#############################################################################
# Figure3.R
# Aim: to replicate Figure 3 in Atsusaka and Stevenson (2021)
# Run time: about 20 seconds
#############################################################################

library(tidyverse)
library(scales)
rm(list=ls())
start <- Sys.time()

pdf("Figure3.pdf", width=10, height=6.7)
par(mfrow=c(2,3), mar=c(2,2,3,2), oma=c(3.5,4,0,0), mgp=c(3,0.6,0))
set.seed(123)

#############################################################################
# (1) DATA GENERATING PROCESS
#############################################################################
# PARAMETERS
N = 2000        # Sample size
p = 0.15        # Randomization probability
p2 = 0.15       # Anchor randomization probability
inat.vec <- seq(from=0, to=1, by=0.1) # % INAttentiveness vector
pi.att = 0.05    # True prevelance FOR ATTENTIVE RESPONSES

pi.ina.vec = c(0.05, 0.1,0.2,0.3,0.4,0.45)

for(k in seq_along(pi.ina.vec)){
pi.ina = pi.ina.vec[k]     # True prevalence FOR INATTENTIVE RESPONSE

# STORAGE OF ESTIMATES
pi.hat.naive <- rep(NA, 11)
naive.low <- rep(NA, 11)
naive.high <- rep(NA, 11)
B <- rep(NA, 11)
pi.hat.bc <- rep(NA, 11)
bc.low <- rep(NA, 11)
bc.high <- rep(NA, 11)
true.prev <- rep(NA, 11)

for(i in seq_along(inat.vec)){
gamma = 1 - inat.vec[i]                             # Attentive rate

Attentive = rbinom(n=N, size=1, prob=gamma)         # Attentive R or not
N.att = sum(Attentive) # Number of ATTENTIVE RESPONSES
N.ina = N - N.att      # Number of INATTENTIVE RESPONSES
true.prev[i] = pi.att*gamma + pi.ina*(1-gamma)

# SENSITIVE QUESTION OF INTERST
StatementA.att = rbinom(n=N.att, size=1, prob=pi.att) # Sesitive item
StatementA.ina = rbinom(n=N.ina, size=1, prob=pi.ina) # Sesitive item
StatementA = c(StatementA.att, StatementA.ina)        # Combine
StatementB = rbinom(n=N, size=1, prob=p)            # Randomization item
True.res = ifelse(StatementA != StatementB, 0, 1)   # Survey answer 

Obs.res = rep(NA, N)
Obs.res[Attentive==1] = True.res[Attentive==1]      # Observed answer for attentive Rs
Obs.res[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                               size=1, prob=0.5)    # Observed answer, otherwise


# NON-SENSITIVE AUXIALLY QUESTION
StatementC = 0                                      # Attention "c"heck item (True prevalence is 0 for everyone)
StatementD = rbinom(n=N, size=1, prob=p2)           # Anchor question
True.res.prime = ifelse(StatementC != StatementD, 0, 1)   # Survey answer in Anchor Question

Obs.res.prime = rep(NA, N)
Obs.res.prime[Attentive==1] = True.res.prime[Attentive==1] # Observed answer for attentive Rs
Obs.res.prime[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                                     size=1, prob=0.5)            # Observed answer, otherwise


dat <- cbind(Obs.res, True.res, Attentive, Obs.res.prime, True.res.prime)
dat <- as_tibble(dat)
head(dat) # See the D


#############################################################################
# (2) NAIVE CROSSWISE MODEL
#############################################################################
lambda.hat = mean(Obs.res) # Observed proportion of YESYES or NONO
pi.hat.naive[i] = (lambda.hat+p-1)/(2*p-1)

pi.hat.naive.var = (pi.hat.naive[i]*(1-pi.hat.naive[i]))/(N-1) +
                   (p*(1-p))/((N-1)*((2*p-1)^2))
pi.hat.naive.sd = sqrt(pi.hat.naive.var)     

naive.low[i]  = pi.hat.naive[i]-1.96*pi.hat.naive.sd; naive.low  = ifelse(naive.low > 0,  naive.low, 0)
naive.high[i] = pi.hat.naive[i]+1.96*pi.hat.naive.sd; naive.high = ifelse(naive.high < 1, naive.high, 1)


B[i] = (gamma-1)/(2*gamma)*((lambda.hat-0.5)/(p-0.5))


#############################################################################
# (3) BIAS-CORRECTED
#############################################################################

gamma.hat = (mean(Obs.res.prime)-0.5)/(0.5-p2) # Estimated level of inattentiveness
Bias.hat = (1/2)*((lambda.hat-0.5)/(p-0.5)) - (1/(2*gamma.hat))*((lambda.hat-0.5)/(p-0.5))
Bias.hat

pi.hat.bc[i] = pi.hat.naive[i] - Bias.hat    # Bias Correction
pi.hat.bc[i] = min(1, max(pi.hat.bc[i], 0))  # Logical bound restrain


# BOOTSTRAPPING (1000 TIMES)
bs <- NA
for(j in 1:1000){
  index <- sample(1:nrow(dat), size=dim(dat), replace=TRUE)   # RESAMPLE WITH REPLACEMENT
  bs.dat <- dat[index,]                                       # BOOTSTRAPPED DATA
  
  bs.lambda.hat = mean(bs.dat$Obs.res)                    # Observed proportion of YESYES or NONO
  bs.pi.hat.naive = (bs.lambda.hat+p-1)/(2*p-1)
  bs.gamma.hat = (mean(bs.dat$Obs.res.prime)-0.5)/(0.5-p) # Estimated level of inattentiveness
  bs.bias.hat = (1/2)*((bs.lambda.hat-0.5)/(p-0.5)) - (1/(2*bs.gamma.hat))*((bs.lambda.hat-0.5)/(p-0.5)) 
  
  bs[j] = bs.pi.hat.naive - bs.bias.hat # Bias Correction within Bootstrapping
  bs[j] = min(1, max(bs[j], 0))         # Logical bound restrain
    
  }

pi.hat.bc.var = var(bs)
pi.hat.bc.sd = sqrt(pi.hat.bc.var)

bc.low[i]  = quantile(bs, prob=0.025, na.rm = T); bc.low  = ifelse(bc.low > 0,  bc.low, 0)
bc.high[i] = quantile(bs, prob=0.975, na.rm = T); bc.high = ifelse(bc.high < 1, bc.high, 1)

}


#############################################################################
# (4) VISUALIZATION
#############################################################################

inat.vec2 <- inat.vec*100


plot(pi.hat.bc ~ inat.vec2, ylim=c(0,0.6), type="n", las=1,
     ylab="",
     xlab="",
     cex.axis=1.5)

# NAIVE ESTIMATOR
naive_y <- c(naive.low[1], naive.high, rev(naive.low))
naive_x <- c(inat.vec2[1], inat.vec2, rev(inat.vec2))
polygon(naive_x, naive_y, col=alpha(gray(0.9),0.8), border=NA)
lines(pi.hat.naive ~ inat.vec2, lwd=1, col=alpha("black",0.7))

# BIAS-CORRECTED
lines(pi.hat.bc ~ inat.vec2, lwd=3, col="firebrick4")
lines(bc.high ~ inat.vec2, lty=1, lwd=0.5,col=alpha("dimgray",0.3))
lines(bc.low ~ inat.vec2, lty=1, lwd=0.5,col=alpha("dimgray",0.3))


points(0,true.prev[1], pch=0, cex=2.5, col="gray50")
points(100,true.prev[11], pch=15, cex=2.5, col="gray50")
title(substitute(paste(pi[inattentive], sep=" = ", v), list(v=pi.ina)), 
      font=2, cex.main=2, line=1.2)

lines(true.prev ~ inat.vec2, col="gray50", lwd=2, lty=2)

if(pi.ina==0.05){

text(x=60, y=0.55, labels="Bias-corrected", font=1, cex=1.8, col="firebrick4")
text(x=30, y=0.4, labels="Naive estimator", font=1, cex=1.8, col=alpha("dimgray",0.9))
arrows(x0=30,x1=30,y0=0.35,y1=0.23,length=0.1,col="dimgray")
 

}else if(pi.ina==0.1){
txcol="dimgray"
text(x=15, y=0.55, labels= bquote("n" == 2000), cex=1.5, col=txcol)
text(x=10, y=0.5, labels= bquote(paste(pi,"'") == 0), cex=1.5, col=txcol)
text(x=14, y=0.45, labels= bquote("p" == 0.15), cex=1.5, col=txcol)
text(x=14.5, y=0.4, labels= bquote("p'" == 0.15), cex=1.5, col=txcol)  
 
}else if(pi.ina==0.3){
  
points(x=5,y=0.55,pch=0,cex=3,col="dimgray")  
points(x=5,y=0.5,pch=15,cex=3,col="dimgray")  
text(x=26,y=0.55,cex=1.5, labels="Attentives",col="dimgray")
text(x=28,y=0.5,cex=1.5,labels="Inattentives",col="dimgray")
  
}else{} # DO NOTHING

}

mtext(side=1, line=2, outer=T, font=1, cex=1.5,
      text="Percentage of Inattentive Respondents")
mtext(side=2, line=2, outer=T, font=1, cex=1.5, las=0, 
      text="Prevalence of Sensitive Attributes")

dev.off()


end <- Sys.time()
start - end

#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################